/*
NOTES TO DATA ANALYSIS:
	1. First stage regressions
*/

clear all
set more off
capture log close
********************************************************************************
log using "$logfiles/analysis_firststage_`c(current_date)'.log", replace

use "$covidclean/smscovid_clean.dta", clear

***Keep relevant observations: consent taken and above 18 years of age, non-missing treatment arms
	keep if consent==1 & age>=18
	keep if ~missing(treatment_arm)
	count //3964

***HOUSEKEEPING - BEHAVIOR TREATMENTS 
	
	***Create dummies with treatment arms interacted with behavior
		foreach v in treatment_pooled {
			gen `v'_sd = `v'*behavior_sd
			gen `v'_hw = `v'*behavior_hw
		}
		
	***Font for table
		global font "basefont(footnotesize) statfont(footnotesize; footnotesize; footnotesize; footnotesize) rtitlfont(footnotesize; footnotesize; footnotesize; footnotesize) ctitlfont(footnotesize) notefont(scriptsize)"
	
********************************************************************************
***DESCRIPTIVES
********************************************************************************

*Self-reported SMS receipt
tab smsreceive treatment_pooled, mi

*Self-reported SMS counts
summ smscount, d
summ smscount if smsreceive==1
count if missing(smscount) & smsreceive==1 //41
tab smscount treatment_pooled, mi

	*Winsorize sms counts but not trim to missing values
	replace smscount=0 if smsreceive==0
	winsor2 smscount, cuts(5 95) replace
	tab smscount treatment_pooled, mi
	bys treatment_pooled: summ smscount
		
*Fraction of analysis sample who did not receive SMS
count if failed_frac>0
count if failed_frac>0 | notdelivered_frac>0

preserve

collapse (sum) delivered_count, by(roundofinterview)

tw scatter delivered_count roundofinterview

restore


********************************************************************************
***FIRST STAGE REGRESSION
********************************************************************************

	*** ITT regressions 
	
	foreach v in smsreceive smscount smscontent_sd smscontent_hw {
		
		*Control mean
		mean `v' if treatment_pooled==0 & $sample
		local c_mean : di %9.2f r(table)[1,1]

		*With controls
		reghdfe `v' treatment_pooled if $sample, a($studycontrols $covariates) vce(robust) 
		local f : di %9.2f `e(F)'

		*Store output
		outreg, keep(treatment_pooled) store(`v'_p) se starloc(1) rtitle("Pooled treatment") ///
		addrows("Control Mean", `c_mean' \ "F-statistic", `f') 
			
	}
	
	** SMS content by each target arm
	
		*** Social distancing
		
			*Control mean
			mean smscontent_sd if treatment_pooled_sd==0 & $sample
			local c_mean : di %9.2f r(table)[1,1]
			
			*With controls
			reghdfe smscontent_sd treatment_pooled_sd if $sample, a($studycontrols $covariates) vce(robust) 
			local f : di %9.2f `e(F)'

			*Store output
			outreg, keep(treatment_pooled_sd) store(smscontent_sd) se starloc(1) rtitle("Treatment - SD") ///
			addrows("Control Mean", `c_mean' \ "F-statistic", `f') 
			
		*** Handwashing
		
			*Control mean
			mean smscontent_hw if treatment_pooled_hw==0 & $sample
			local c_mean : di %9.2f r(table)[1,1]
			
			*With controls
			reghdfe smscontent_hw treatment_pooled_hw if $sample, a($studycontrols $covariates) vce(robust) 
			local f : di %9.2f `e(F)'

			*Store output
			outreg, keep(treatment_pooled_hw) store(smscontent_hw) se starloc(1) rtitle("Treatment - HW") ///
			addrows("Control Mean", `c_mean' \ "F-statistic", `f') 
				
	
********************************************************************************
***COMPILE OUTPUTS
********************************************************************************
	
	foreach v in smsreceive smscount smscontent_sd {
		outreg, replay(pooled_fs) merge(`v'_p) store(pooled_fs) nodisplay
	}
	
	
	outreg, replay(pooled_fs) merge(smscontent_sd) store(pooled_fs) nodisplay
	outreg, replay(pooled_fs) merge(smscontent_hw_p) store(pooled_fs) nodisplay
	outreg, replay(pooled_fs) merge(smscontent_hw) store(pooled_fs) nodisplay
	
	
	outreg using "$tables/firststage", replay(pooled_fs) store(pooled_fs) tex fr replace hlines(1{0};1{0};1{0}1;1{0}1) ///
	ctitles("", "Any SMS", "$ #$ SMS", "SD SMS","SD SMS", "HW SMS", "HW SMS" \ "", "", "", "$ |$ Any SMS", "$ |$ Any SMS", "$ |$ Any SMS", "$ |$ Any SMS") ///
	note("") $font
	
log close
exit, clear
